# author: Han Zhang
library(ggplot2)
c = read.csv('keywordCoverageSet.txt', stringsAsFactor = F, header = F, sep = " ")
names(c) <- c("num", "keyword","prop")

ggplot(c, aes(x = num, y = prop)) + 
  geom_point(size = 0.2) +
  geom_vline(xintercept = 50, linetype = "dotdash")+
  scale_y_continuous(breaks = seq(0, 1, 0.05), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 1000, 50), limits = c(0, 1000)) +
  xlab ("Size of Keyword Dictionary K") +
  ylab ("Proportion of Wickedonna posts that contains words in K") +
  theme(axis.text.y= element_text(size = 16, colour = 'black'),
        axis.title.y=element_text(size = 14, colour = 'black')) +
  theme(axis.text.x= element_text(size = 16, colour = 'black'),
        axis.title.x=element_text(size = 15, colour = 'black')) +
  theme_bw()

ggsave("keyword_coverage_wickedonna.pdf", device = "pdf", width = 6, height = 5, units = "in")
